msg_tool\scripts\yuris\arc/
ypf.rs

1//! Yu-Ris Archive (.ypf)
2use super::pe;
3use crate::ext::io::*;
4use crate::ext::mutex::*;
5use crate::scripts::base::*;
6use crate::types::*;
7use crate::utils::encoding::*;
8use crate::utils::murmur2::*;
9use crate::utils::struct_pack::*;
10use crate::utils::threadpool::*;
11use anyhow::{Result, anyhow, bail};
12use clap::ValueEnum;
13use int_enum::IntEnum;
14use std::any::Any;
15use std::collections::HashMap;
16use std::hash::Hasher;
17use std::io::{Read, Seek, SeekFrom, Write};
18use std::num::NonZeroU64;
19use std::ops::DerefMut;
20use std::sync::{Arc, Mutex};
21
22#[derive(Debug)]
23pub struct YpfBuilder {}
24
25impl YpfBuilder {
26    pub fn new() -> Self {
27        Self {}
28    }
29}
30
31impl ScriptBuilder for YpfBuilder {
32    fn default_encoding(&self) -> Encoding {
33        Encoding::Cp932
34    }
35
36    fn default_archive_encoding(&self) -> Option<Encoding> {
37        Some(Encoding::Cp932)
38    }
39
40    fn build_script(
41        &self,
42        data: Vec<u8>,
43        _filename: &str,
44        _encoding: Encoding,
45        archive_encoding: Encoding,
46        config: &ExtraConfig,
47        _archive: Option<&Box<dyn Script>>,
48    ) -> Result<Box<dyn Script + Send + Sync>> {
49        let mut base_offset = 0;
50        if data.starts_with(b"MZ") {
51            base_offset = pe::get_base_offset(&data)?;
52        }
53        Ok(Box::new(YPF::new(
54            MemReader::new(data),
55            archive_encoding,
56            config,
57            base_offset,
58        )?))
59    }
60
61    fn build_script_from_file(
62        &self,
63        filename: &str,
64        _encoding: Encoding,
65        archive_encoding: Encoding,
66        config: &ExtraConfig,
67        _archive: Option<&Box<dyn Script>>,
68    ) -> Result<Box<dyn Script + Send + Sync>> {
69        if filename == "-" {
70            let data = crate::utils::files::read_file(filename)?;
71            let mut base_offset = 0;
72            if data.starts_with(b"MZ") {
73                base_offset = pe::get_base_offset(&data)?;
74            }
75            Ok(Box::new(YPF::new(
76                MemReader::new(data),
77                archive_encoding,
78                config,
79                base_offset,
80            )?))
81        } else {
82            let mut file = std::fs::File::open(filename)?;
83            let mut base_offset = 0;
84            if file.peek_and_equal(b"MZ").is_ok() {
85                let mp = pelite::FileMap::open(filename)?;
86                base_offset = pe::get_base_offset(&mp)?;
87            }
88            Ok(Box::new(YPF::new(
89                file,
90                archive_encoding,
91                config,
92                base_offset,
93            )?))
94        }
95    }
96
97    fn build_script_from_reader<'a>(
98        &self,
99        mut reader: Box<dyn ReadSeek + Send + Sync + 'a>,
100        _filename: &str,
101        _encoding: Encoding,
102        archive_encoding: Encoding,
103        config: &ExtraConfig,
104        _archive: Option<&Box<dyn Script>>,
105    ) -> Result<Box<dyn Script + Send + Sync + 'a>> {
106        let mut base_offset = 0;
107        if reader.peek_and_equal(b"MZ").is_ok() {
108            let mut data = Vec::new();
109            let pos = reader.stream_position()?;
110            reader.read_to_end(&mut data)?;
111            reader.seek(SeekFrom::Start(pos))?;
112            base_offset = pe::get_base_offset(&data)?;
113        }
114        Ok(Box::new(YPF::new(
115            reader,
116            archive_encoding,
117            config,
118            base_offset,
119        )?))
120    }
121
122    fn extensions(&self) -> &'static [&'static str] {
123        &["ypf", "exe"]
124    }
125
126    fn script_type(&self) -> &'static ScriptType {
127        &ScriptType::YurisYPF
128    }
129
130    fn is_this_format(&self, filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
131        if buf_len >= 4 && buf.starts_with(b"YPF\0") {
132            return Some(20);
133        }
134        if buf_len >= 2 && buf.starts_with(b"MZ") {
135            let p = std::path::Path::new(filename);
136            if p.exists() {
137                if let Ok(file) = pelite::FileMap::open(p) {
138                    if pe::get_base_offset(&file).is_ok() {
139                        return Some(20);
140                    }
141                }
142            }
143        }
144        None
145    }
146
147    fn is_archive(&self) -> bool {
148        true
149    }
150
151    fn create_archive(
152        &self,
153        filename: &str,
154        files: &[&str],
155        encoding: Encoding,
156        config: &ExtraConfig,
157    ) -> Result<Box<dyn Archive>> {
158        let f = std::fs::File::create(filename)?;
159        let writer = std::io::BufWriter::new(f);
160        Ok(Box::new(YPFArchiveWriter::new(
161            writer, files, encoding, config,
162        )?))
163    }
164}
165
166#[repr(u8)]
167#[derive(Debug, IntEnum, Clone, Copy)]
168enum ResourceType {
169    Default,
170    BMP,
171    PNG,
172    JPG,
173    GIF,
174    WAV,
175    OGG,
176    PSD,
177    YCG,
178    PSB,
179    WAV_,
180    OGG_,
181    OPUS,
182}
183
184impl Default for ResourceType {
185    fn default() -> Self {
186        Self::Default
187    }
188}
189
190/// Map file extension to `ResourceType`.
191///
192/// When `use_new_file_type` is true, ogg/wav are mapped to the newer
193/// type values (`OGG_` / `WAV_`); otherwise they use the legacy values.
194fn get_file_type(name: &str, use_new_file_type: bool) -> ResourceType {
195    let ext = name.rsplit('.').next().unwrap_or("").to_ascii_lowercase();
196    match ext.as_str() {
197        "bmp" => ResourceType::BMP,
198        "png" => ResourceType::PNG,
199        "jpg" | "jpeg" => ResourceType::JPG,
200        "gif" => ResourceType::GIF,
201        "ycg" => ResourceType::YCG,
202        "psb" => ResourceType::PSB,
203        "wav" => {
204            if use_new_file_type {
205                ResourceType::WAV_
206            } else {
207                ResourceType::WAV
208            }
209        }
210        "ogg" => {
211            if use_new_file_type {
212                ResourceType::OGG_
213            } else {
214                ResourceType::OGG
215            }
216        }
217        "psd" => ResourceType::PSD,
218        "opus" => ResourceType::OPUS,
219        _ => ResourceType::Default,
220    }
221}
222
223#[derive(Clone, Debug)]
224struct YPFEntry {
225    name_hash: u32,
226    name: String,
227    typ: ResourceType,
228    compressed: bool,
229    size: u32,
230    compressed_size: u32,
231    offset: u64,
232    hash: Option<u32>,
233}
234
235fn get_info_as_version(info: &Option<Box<dyn Any>>) -> Result<u32> {
236    Ok(*info
237        .as_ref()
238        .ok_or_else(|| anyhow::anyhow!("info not found"))?
239        .downcast_ref()
240        .ok_or_else(|| anyhow::anyhow!("not YSTBHeader"))?)
241}
242
243impl StructPack for YPFEntry {
244    fn pack<W: Write>(
245        &self,
246        writer: &mut W,
247        big: bool,
248        encoding: Encoding,
249        info: &Option<Box<dyn std::any::Any>>,
250    ) -> Result<()> {
251        let version = get_info_as_version(info)?;
252        self.name_hash.pack(writer, big, encoding, info)?;
253        let table = if version < 500 {
254            &NAME_DEFAULT_TABLE
255        } else {
256            &NAME_V500_TABLE
257        };
258        let mut name = encode_string(encoding, &self.name, true)?;
259        if name.len() > 0xFF {
260            bail!("File name can not longer than 255 bytes.");
261        }
262        let name_len = name.len() as u8;
263        let name_len = (table
264            .iter()
265            .position(|s| *s == name_len)
266            .ok_or_else(|| anyhow!("No suitable len found in table"))?
267            as u8)
268            ^ 0xFF;
269        name_len.pack(writer, big, encoding, info)?;
270        for num in name.iter_mut() {
271            *num ^= match version {
272                290 => 64,
273                500 => 54,
274                _ => 0,
275            };
276            *num = !(*num);
277        }
278        writer.write_all(&name)?;
279        (self.typ as u8).pack(writer, big, encoding, info)?;
280        self.compressed.pack(writer, big, encoding, info)?;
281        self.size.pack(writer, big, encoding, info)?;
282        self.compressed_size.pack(writer, big, encoding, info)?;
283        if version >= 480 {
284            self.offset.pack(writer, big, encoding, info)?;
285        } else {
286            (self.offset as u32).pack(writer, big, encoding, info)?;
287        };
288        if version >= 473 {
289            let hash = self.hash.ok_or_else(|| anyhow!("hash not specified."))?;
290            hash.pack(writer, big, encoding, info)?;
291        }
292        Ok(())
293    }
294}
295
296#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
297pub enum NameHashType {
298    /// Crc32
299    Crc32,
300    /// Murmur2
301    Murmur2,
302}
303
304impl Default for NameHashType {
305    fn default() -> Self {
306        Self::Murmur2
307    }
308}
309
310#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
311pub enum DataHashType {
312    /// Adler32
313    Adler32,
314    /// Murmur2
315    Murmur2,
316    /// Xxhash32
317    Xxh32,
318}
319
320impl Default for DataHashType {
321    fn default() -> Self {
322        Self::Murmur2
323    }
324}
325
326#[derive(Debug)]
327pub struct YPF<'a, T: Read + Seek + std::fmt::Debug + 'a> {
328    #[allow(unused)]
329    version: u32,
330    entries: Vec<YPFEntry>,
331    reader: Arc<Mutex<T>>,
332    _mark: std::marker::PhantomData<&'a ()>,
333}
334
335const NAME_DEFAULT_TABLE: [u8; 256] = [
336    0, 1, 2, 72, 4, 5, 53, 7, 8, 11, 10, 9, 16, 19, 14, 15, 12, 25, 18, 13, 20, 27, 22, 23, 24, 17,
337    26, 21, 30, 29, 28, 31, 35, 33, 34, 32, 36, 37, 41, 39, 40, 38, 42, 43, 47, 45, 50, 44, 48, 49,
338    46, 51, 52, 6, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 3, 73,
339    74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
340    98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
341    117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
342    136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,
343    155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
344    174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
345    193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,
346    212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
347    231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
348    250, 251, 252, 253, 254, 255,
349];
350
351const NAME_V500_TABLE: [u8; 256] = [
352    0, 1, 2, 10, 4, 5, 53, 7, 8, 11, 3, 9, 16, 19, 14, 15, 12, 24, 18, 13, 46, 27, 22, 23, 17, 25,
353    26, 21, 30, 29, 28, 31, 35, 33, 34, 32, 36, 37, 41, 39, 40, 38, 42, 43, 47, 45, 20, 44, 48, 49,
354    50, 51, 52, 6, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
355    74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
356    98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
357    117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
358    136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,
359    155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
360    174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192,
361    193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211,
362    212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
363    231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
364    250, 251, 252, 253, 254, 255,
365];
366
367fn detect_hash(name: &[u8], expected: u32) -> Result<NameHashType> {
368    let mut hasher = StreamingMurmur2::new(0, name.len() as u32);
369    hasher.write(name);
370    if hasher.finish() as u32 == expected {
371        return Ok(NameHashType::Murmur2);
372    }
373    if crc32fast::hash(name) == expected {
374        return Ok(NameHashType::Crc32);
375    }
376    bail!("Unknown hash type or checksum/name is invalid/broken")
377}
378
379fn detect_data_hash<T: Read + Seek>(
380    mut stream: T,
381    size: u32,
382    expected: u32,
383) -> Result<DataHashType> {
384    let mut murmur2_hasher = StreamingMurmur2::new(0, size);
385    let mut adler32_hasher = adler::Adler32::new();
386    let mut xxh32_hasher = Xxh32::new(0);
387    let mut buf = [0; 1024];
388    loop {
389        let readed = stream.read(&mut buf)?;
390        if readed == 0 {
391            break;
392        }
393        let b = &buf[..readed];
394        murmur2_hasher.write(b);
395        adler32_hasher.write(b);
396        xxh32_hasher.write(b);
397    }
398    if murmur2_hasher.finish() as u32 == expected {
399        return Ok(DataHashType::Murmur2);
400    }
401    if adler32_hasher.finish() as u32 == expected {
402        return Ok(DataHashType::Adler32);
403    }
404    if xxh32_hasher.finish() as u32 == expected {
405        return Ok(DataHashType::Xxh32);
406    }
407    bail!("Unknown hash type or checksum/data is invalid/broken")
408}
409
410fn cal_name_hash(name: &[u8], typ: NameHashType) -> u32 {
411    match typ {
412        NameHashType::Crc32 => crc32fast::hash(name),
413        NameHashType::Murmur2 => {
414            let mut hasher = StreamingMurmur2::new(0, name.len() as u32);
415            hasher.write(name);
416            hasher.finish() as u32
417        }
418    }
419}
420
421impl<'b, T: Read + Seek + std::fmt::Debug + Send + Sync + 'b> YPF<'b, T> {
422    pub fn new(
423        mut reader: T,
424        archive_encoding: Encoding,
425        config: &ExtraConfig,
426        base_offset: u64,
427    ) -> Result<Self> {
428        if base_offset > 0 {
429            reader.seek(SeekFrom::Start(base_offset))?;
430        }
431        let mut header = [0u8; 4];
432        reader.read_exact(&mut header)?;
433        if &header != b"YPF\0" {
434            bail!("Invalid YPF archive header")
435        }
436        let version = reader.read_u32()?;
437        if !matches!(version, 234..=500) {
438            bail!("Unsupported YPF engine version: {}", version);
439        }
440        eprintln!("Yuris YPF engine version: {version}");
441        let count = reader.read_u32()?;
442        let index_size = reader.read_u32()?;
443        let mut entries = Vec::with_capacity(count as usize);
444        let table = if version < 500 {
445            &NAME_DEFAULT_TABLE
446        } else {
447            &NAME_V500_TABLE
448        };
449        let mut hash_type = None;
450        {
451            let mut index = StreamRegion::new(&mut reader, 0x20, index_size as u64)?;
452            for _ in 0..count {
453                let hash = index.read_u32()?;
454                let length = table[(index.read_u8()? ^ 0xff) as usize];
455                let mut name = index.read_exact_vec(length as usize)?;
456                for num in name.iter_mut() {
457                    *num = !(*num);
458                    *num ^= match version {
459                        290 => 64,
460                        500 => 54,
461                        _ => 0,
462                    };
463                }
464                if config.yuris_check_hash {
465                    if let Some(hash_type) = hash_type {
466                        let thash = cal_name_hash(&name, hash_type);
467                        if hash != thash {
468                            let name = decode_to_string(archive_encoding, &name, false)?;
469                            bail!(
470                                "checksum/name is invalid/broken for {name}. expected hash: {hash:08X}, actual: {thash:08X}"
471                            );
472                        }
473                    } else {
474                        let typ = detect_hash(&name, hash)?;
475                        eprintln!("Detected name hash type: {:?}", typ);
476                        hash_type = Some(typ);
477                    }
478                }
479                let name = decode_to_string(archive_encoding, &name, true)?;
480                entries.push(YPFEntry {
481                    name_hash: hash,
482                    name: name.clone(),
483                    typ: index
484                        .read_u8()?
485                        .try_into()
486                        .map_err(|e| anyhow!("Unknown entry type for {name}: {}", e))?,
487                    compressed: index.read_u8()? != 0,
488                    size: index.read_u32()?,
489                    compressed_size: index.read_u32()?,
490                    offset: if version >= 480 {
491                        index.read_u64()?
492                    } else {
493                        index.read_u32()? as u64
494                    },
495                    hash: if version >= 473 {
496                        Some(index.read_u32()?)
497                    } else {
498                        None
499                    },
500                })
501            }
502        }
503        if config.yuris_debug_archive {
504            println!("Entries in yuris YPF: {:#?}", entries);
505            let _ = std::io::stdout().flush();
506        }
507        if config.yuris_check_hash {
508            let mut data_hash_type = None;
509            for entry in &entries {
510                let hash = match entry.hash {
511                    Some(hash) if hash != 0 => hash,
512                    _ => continue,
513                };
514                let mut stream = StreamRegion::new(
515                    &mut reader,
516                    entry.offset,
517                    entry.offset + entry.compressed_size as u64,
518                )?;
519                if let Some(hash_type) = data_hash_type {
520                    let mut hasher: Box<dyn Hasher> = match hash_type {
521                        DataHashType::Adler32 => Box::new(adler::Adler32::new()),
522                        DataHashType::Murmur2 => {
523                            Box::new(StreamingMurmur2::new(0, entry.compressed_size))
524                        }
525                        DataHashType::Xxh32 => Box::new(Xxh32::new(0)),
526                    };
527                    let mut buf = [0; 1024];
528                    loop {
529                        let readed = stream.read(&mut buf)?;
530                        if readed == 0 {
531                            break;
532                        }
533                        hasher.write(&buf[..readed]);
534                    }
535                    let thash = hasher.finish() as u32;
536                    if thash != hash {
537                        bail!(
538                            "checksum/data is invalid/broken for {}. expected hash: {hash:08X}, actual: {thash:08X}",
539                            entry.name
540                        );
541                    }
542                } else {
543                    let typ = detect_data_hash(stream, entry.compressed_size, hash)?;
544                    eprintln!("Detected data hash type: {:?}", typ);
545                    data_hash_type = Some(typ);
546                }
547            }
548        }
549        Ok(Self {
550            version,
551            entries,
552            reader: Arc::new(Mutex::new(reader)),
553            _mark: std::marker::PhantomData,
554        })
555    }
556}
557
558impl<'b, T: Read + Seek + std::fmt::Debug + Send + Sync + 'b> Script for YPF<'b, T> {
559    fn default_output_script_type(&self) -> OutputScriptType {
560        OutputScriptType::Json
561    }
562
563    fn default_format_type(&self) -> FormatOptions {
564        FormatOptions::None
565    }
566
567    fn is_archive(&self) -> bool {
568        true
569    }
570
571    fn iter_archive_filename<'a>(
572        &'a self,
573    ) -> Result<Box<dyn Iterator<Item = Result<String>> + 'a>> {
574        Ok(Box::new(self.entries.iter().map(|s| Ok(s.name.clone()))))
575    }
576
577    fn iter_archive_offset<'a>(&'a self) -> Result<Box<dyn Iterator<Item = Result<u64>> + 'a>> {
578        Ok(Box::new(self.entries.iter().map(|s| Ok(s.offset))))
579    }
580
581    fn open_file<'a>(&'a self, index: usize) -> Result<Box<dyn ArchiveContent + Send + Sync + 'a>> {
582        let entry = self
583            .entries
584            .get(index)
585            .ok_or_else(|| anyhow!("index out of bound"))?;
586        let mut entry = Entry {
587            entry,
588            stream: StreamRegion::with_size(
589                MutexWrapper::new(self.reader.clone(), entry.offset),
590                entry.compressed_size as u64,
591            )?,
592            cache: Mutex::new(None),
593            pos: 0,
594            script_type: None,
595        };
596        let mut buf = [0; 0x20];
597        let readed = entry.read(&mut buf)?;
598        entry.rewind()?;
599        entry.script_type = detect_script_type(&entry.entry.name, readed, &buf);
600        Ok(Box::new(entry))
601    }
602}
603
604fn detect_script_type(_filename: &str, buf_len: usize, buf: &[u8]) -> Option<ScriptType> {
605    if buf_len >= 4 {
606        if buf.starts_with(b"YSCF") {
607            return Some(ScriptType::YurisYSCFG);
608        }
609        if buf.starts_with(b"YSCM") {
610            return Some(ScriptType::YurisYSCM);
611        }
612        if buf.starts_with(b"YSER") {
613            return Some(ScriptType::YurisYSER);
614        }
615        if buf.starts_with(b"YSLB") {
616            return Some(ScriptType::YurisYSLB);
617        }
618        if buf.starts_with(b"YSTB") {
619            return Some(ScriptType::YurisYSTB);
620        }
621        if buf.starts_with(b"YSTD") {
622            return Some(ScriptType::YurisYSTD);
623        }
624        if buf.starts_with(b"YSTL") {
625            return Some(ScriptType::YurisYSTL);
626        }
627        if buf.starts_with(b"YSVR") {
628            return Some(ScriptType::YurisYSVR);
629        }
630    }
631    #[cfg(feature = "yuris-img")]
632    if buf_len >= 12 && buf.starts_with(b"YDG\0YU-RIS\0\0") {
633        return Some(ScriptType::YurisYDG);
634    }
635    None
636}
637
638#[derive(Debug)]
639struct Entry<'a, T: Read + Seek + std::fmt::Debug + Send + Sync + 'a> {
640    entry: &'a YPFEntry,
641    stream: StreamRegion<MutexWrapper<T>>,
642    cache: Mutex<Option<Box<dyn ReadDebug + Send + Sync + 'a>>>,
643    pos: u64,
644    script_type: Option<ScriptType>,
645}
646
647impl<'b, T: Read + Seek + std::fmt::Debug + Send + Sync + 'b> ArchiveContent for Entry<'b, T> {
648    fn name(&self) -> &str {
649        &self.entry.name
650    }
651
652    fn size(&self) -> Option<u64> {
653        Some(self.entry.compressed_size as u64)
654    }
655
656    fn script_type(&self) -> Option<&ScriptType> {
657        self.script_type.as_ref()
658    }
659
660    fn to_data<'a>(&'a mut self) -> Result<Box<dyn ReadSeek + Send + Sync + 'a>> {
661        Ok(Box::new(self))
662    }
663}
664
665impl<'a, T: Read + Seek + std::fmt::Debug + Send + Sync + 'a> Read for Entry<'a, T> {
666    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
667        if self.entry.compressed {
668            let mut lock = self.cache.lock().map_err(|_| {
669                std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
670            })?;
671            if let Some(cache) = lock.as_mut() {
672                let readed = cache.read(buf)?;
673                self.pos += readed as u64;
674                return Ok(readed);
675            }
676            self.stream.rewind()?;
677            let mut cache = Box::new(flate2::read::ZlibDecoder::new(self.stream.clone()))
678                as Box<dyn ReadDebug + Send + Sync + 'a>;
679            if self.pos > 0 {
680                cache.skip(self.pos)?;
681            }
682            let readed = cache.read(buf)?;
683            self.pos += readed as u64;
684            lock.replace(cache);
685            Ok(readed)
686        } else {
687            self.stream.read(buf)
688        }
689    }
690}
691
692impl<'a, T: Read + Seek + std::fmt::Debug + Send + Sync + 'a> Seek for Entry<'a, T> {
693    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
694        if self.entry.compressed {
695            let new_pos = match pos {
696                SeekFrom::Start(p) => p,
697                SeekFrom::End(offset) => {
698                    if offset < 0 {
699                        if (-offset) as u64 > self.entry.size as u64 {
700                            return Err(std::io::Error::new(
701                                std::io::ErrorKind::InvalidInput,
702                                "Seek from end exceeds file length",
703                            ));
704                        }
705                        self.entry.size as u64 - (-offset) as u64
706                    } else {
707                        self.entry.size as u64 + offset as u64
708                    }
709                }
710                SeekFrom::Current(offset) => {
711                    if offset < 0 {
712                        if (-offset) as u64 > self.pos {
713                            return Err(std::io::Error::new(
714                                std::io::ErrorKind::InvalidInput,
715                                "Seek from current exceeds file start",
716                            ));
717                        }
718                        self.pos - (-offset) as u64
719                    } else {
720                        self.pos + offset as u64
721                    }
722                }
723            };
724            let mut lock = self.cache.lock().map_err(|_| {
725                std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
726            })?;
727            if let Some(cache) = lock.as_mut()
728                && self.pos <= new_pos
729            {
730                let to_skip = new_pos - self.pos;
731                if to_skip > 0 {
732                    cache.skip(to_skip)?;
733                }
734                self.pos = new_pos;
735                Ok(new_pos)
736            } else {
737                lock.take();
738                self.pos = new_pos;
739                Ok(new_pos)
740            }
741        } else {
742            self.stream.seek(pos)
743        }
744    }
745
746    fn stream_position(&mut self) -> std::io::Result<u64> {
747        if self.entry.compressed {
748            Ok(self.pos)
749        } else {
750            self.stream.stream_position()
751        }
752    }
753}
754
755pub struct Xxh32 {
756    inner: xxhash_rust::xxh32::Xxh32,
757}
758
759impl Xxh32 {
760    pub fn new(seed: u32) -> Self {
761        Self {
762            inner: xxhash_rust::xxh32::Xxh32::new(seed),
763        }
764    }
765}
766
767impl Hasher for Xxh32 {
768    fn write(&mut self, bytes: &[u8]) {
769        self.inner.update(bytes);
770    }
771    fn finish(&self) -> u64 {
772        self.inner.digest() as u64
773    }
774}
775
776pub struct YPFArchiveWriter<T: Write + Seek> {
777    writer: Arc<Mutex<T>>,
778    headers: Arc<Mutex<HashMap<String, YPFEntry>>>,
779    version: u32,
780    compress: bool,
781    zopfli: bool,
782    compress_level: u32,
783    zopfli_iteration_count: NonZeroU64,
784    zopfli_iterations_without_improvement: NonZeroU64,
785    zopfli_maximum_block_splits: u16,
786    runner: ThreadPool<Result<()>>,
787    data_hash: DataHashType,
788    encoding: Encoding,
789}
790
791impl<T: Write + Seek> YPFArchiveWriter<T> {
792    /// Creates a new YPF Archive Writer.
793    ///
794    /// * `writer` - The writer to write the archive to.
795    /// * `files` - The list of files to include in the archive.
796    /// * `encoding` - The encoding used for the archive.
797    /// * `config` - Extra configuration options.
798    pub fn new(
799        mut writer: T,
800        files: &[&str],
801        encoding: Encoding,
802        config: &ExtraConfig,
803    ) -> Result<Self> {
804        writer.write_all(b"YPF\0")?;
805        let version = config.yuris_ypf_version.ok_or_else(|| {
806            anyhow!("Version is required. Use --yuris-ypf-version to specify version.")
807        })?;
808        writer.write_u32(version)?;
809        let file_count = files.len() as u32;
810        writer.write_u32(file_count)?;
811        writer.write_u32(0)?; // placeholder for header size
812        writer.write_u128(0)?; // unused
813        let mut headers = HashMap::new();
814        let info = &Some(Box::new(version) as Box<dyn Any>);
815        for file in files {
816            let name = encode_string(encoding, file, true)?;
817            let mut hasher: Box<dyn Hasher> = match config.yuris_name_hash_type {
818                NameHashType::Crc32 => Box::new(crc32fast::Hasher::new()),
819                NameHashType::Murmur2 => Box::new(StreamingMurmur2::new(0, name.len() as u32)),
820            };
821            hasher.write(&name);
822            let header = YPFEntry {
823                name_hash: hasher.finish() as u32,
824                name: file.to_string(),
825                typ: get_file_type(file, config.yuris_use_new_file_type),
826                compressed: config.yuris_ypf_compress_file,
827                size: 0,
828                compressed_size: 0,
829                offset: 0,
830                hash: if version >= 473 { Some(0) } else { None },
831            };
832            header.pack(&mut writer, false, encoding, info)?;
833            headers.insert(file.to_string(), header);
834        }
835        let header_size = writer.stream_position()?;
836        writer.write_u32_at(12, header_size as u32)?;
837        Ok(Self {
838            writer: Arc::new(Mutex::new(writer)),
839            headers: Arc::new(Mutex::new(headers)),
840            version,
841            compress: config.yuris_ypf_compress_file,
842            zopfli: config.yuris_ypf_zopfli,
843            compress_level: config.zlib_compression_level,
844            zopfli_iteration_count: config.zopfli_iteration_count,
845            zopfli_iterations_without_improvement: config.zopfli_iterations_without_improvement,
846            zopfli_maximum_block_splits: config.zopfli_maximum_block_splits,
847            runner: ThreadPool::new(
848                if config.yuris_ypf_compress_file {
849                    config.yuris_ypf_workers
850                } else {
851                    1
852                },
853                Some("yuris-ypf-writer"),
854                false,
855            )?,
856            encoding,
857            data_hash: config.yuris_data_hash_type,
858        })
859    }
860
861    fn create_hasher(&self, length: u32) -> Box<dyn Hasher + Send + Sync> {
862        match self.data_hash {
863            DataHashType::Adler32 => Box::new(adler::Adler32::new()),
864            DataHashType::Murmur2 => Box::new(StreamingMurmur2::new(0, length)),
865            DataHashType::Xxh32 => Box::new(Xxh32::new(0)),
866        }
867    }
868
869    fn create_hasher2(&self) -> Box<dyn Hasher + Send + Sync> {
870        match self.data_hash {
871            DataHashType::Adler32 => Box::new(adler::Adler32::new()),
872            DataHashType::Murmur2 => Box::new(Murmur2::new(0)),
873            DataHashType::Xxh32 => Box::new(Xxh32::new(0)),
874        }
875    }
876}
877
878impl<T: Write + Seek + Send + Sync + 'static> Archive for YPFArchiveWriter<T> {
879    fn new_file<'a>(
880        &'a mut self,
881        name: &str,
882        size: Option<u64>,
883    ) -> Result<Box<dyn WriteSeek + 'a>> {
884        let inner = self.new_file_non_seek(name, size)?;
885        Ok(Box::new(Writer {
886            inner,
887            mem: MemWriter::new(),
888        }))
889    }
890
891    fn new_file_non_seek<'a>(
892        &'a mut self,
893        name: &str,
894        size: Option<u64>,
895    ) -> Result<Box<dyn Write + 'a>> {
896        let mut entry = self
897            .headers
898            .lock_blocking()
899            .get(name)
900            .ok_or_else(|| anyhow::anyhow!("File '{}' not found in archive", name))?
901            .clone();
902        if self.compress {
903            let (reader, writer) = std::io::pipe()?;
904            let file = self.writer.clone();
905            let headers = self.headers.clone();
906            let compress_level = self.compress_level;
907            let name = name.to_owned();
908            let zopfli = self.zopfli;
909            let iteration_count = self.zopfli_iteration_count;
910            let iterations_without_improvement = self.zopfli_iterations_without_improvement;
911            let maximum_block_splits = self.zopfli_maximum_block_splits;
912            let data_hash = self.data_hash;
913            self.runner.execute(
914                move |_| {
915                    let mut tsize = 0;
916                    let mut reader = TrackStream::new(reader, &mut tsize);
917                    let mut data = Vec::new();
918                    reader.read_to_end(&mut data)?;
919                    if entry.compressed {
920                        let mut compressed = MemWriter::new();
921                        if zopfli {
922                            let mut encoder = zopfli::ZlibEncoder::new(
923                                zopfli::Options {
924                                    iteration_count,
925                                    iterations_without_improvement,
926                                    maximum_block_splits,
927                                },
928                                zopfli::BlockType::Dynamic,
929                                &mut compressed,
930                            )?;
931                            // std::io::copy(&mut reader, &mut encoder)?;
932                            encoder.write_all(&data)?;
933                            encoder.finish()?;
934                        } else {
935                            let mut encoder = flate2::write::ZlibEncoder::new(
936                                &mut compressed,
937                                flate2::Compression::new(compress_level),
938                            );
939                            // std::io::copy(&mut reader, &mut encoder)?;
940                            encoder.write_all(&data)?;
941                            encoder.finish()?;
942                        }
943                        data = compressed.into_inner();
944                    }
945                    entry.size = tsize as u32;
946                    entry.compressed_size = data.len() as u32;
947                    if let Some(hash) = entry.hash.as_mut() {
948                        let mut hasher: Box<dyn Hasher> = match data_hash {
949                            DataHashType::Adler32 => Box::new(adler::Adler32::new()),
950                            DataHashType::Murmur2 => {
951                                Box::new(StreamingMurmur2::new(0, entry.compressed_size))
952                            }
953                            DataHashType::Xxh32 => Box::new(Xxh32::new(0)),
954                        };
955                        hasher.write(&data);
956                        *hash = hasher.finish() as u32;
957                    }
958                    let mut writer = file.lock_blocking();
959                    entry.offset = writer.seek(SeekFrom::End(0))?;
960                    writer.write_all(&data)?;
961                    headers.lock_blocking().insert(name, entry);
962                    Ok(())
963                },
964                true,
965            )?;
966            Ok(Box::new(writer))
967        } else {
968            let mut writer = self.writer.lock_blocking();
969            entry.offset = writer.seek(SeekFrom::End(0))?;
970            Ok(Box::new(YPFArchiveFile {
971                entry,
972                writer: self.writer.clone(),
973                pos: 0,
974                headers: self.headers.clone(),
975                hasher: if let Some(size) = size {
976                    self.create_hasher(size as u32)
977                } else {
978                    self.create_hasher2()
979                },
980            }))
981        }
982    }
983
984    fn write_header(&mut self) -> Result<()> {
985        self.runner.join();
986        for err in self.runner.take_results() {
987            err?;
988        }
989        let mut writer = self.writer.lock_blocking();
990        let headers = self.headers.lock_blocking();
991        writer.seek(SeekFrom::Start(0x20))?;
992        let mut files = headers.iter().map(|(_, d)| d).collect::<Vec<_>>();
993        files.sort_by_key(|f| f.offset);
994        let info = &Some(Box::new(self.version) as Box<dyn Any>);
995        for file in files {
996            file.pack(writer.deref_mut(), false, self.encoding, info)?;
997        }
998        Ok(())
999    }
1000}
1001
1002struct YPFArchiveFile<T: Write + Seek> {
1003    entry: YPFEntry,
1004    writer: Arc<Mutex<T>>,
1005    pos: usize,
1006    headers: Arc<Mutex<HashMap<String, YPFEntry>>>,
1007    hasher: Box<dyn Hasher + Send + Sync>,
1008}
1009
1010impl<T: Write + Seek> Write for YPFArchiveFile<T> {
1011    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1012        let mut writer = self.writer.lock().map_err(|_| {
1013            std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
1014        })?;
1015        writer.seek(SeekFrom::Start(self.entry.offset + self.pos as u64))?;
1016        let bytes_written = writer.write(buf)?;
1017        self.pos += bytes_written;
1018        self.entry.size = self.entry.size.max(self.pos as u32);
1019        self.hasher.write(&buf[..bytes_written]);
1020        Ok(bytes_written)
1021    }
1022
1023    fn flush(&mut self) -> std::io::Result<()> {
1024        self.writer
1025            .lock()
1026            .map_err(|_| {
1027                std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
1028            })?
1029            .flush()
1030    }
1031}
1032
1033impl<T: Write + Seek> Drop for YPFArchiveFile<T> {
1034    fn drop(&mut self) {
1035        self.entry.compressed_size = self.entry.size;
1036        if let Some(hash) = self.entry.hash.as_mut() {
1037            *hash = self.hasher.finish() as u32;
1038        }
1039        self.headers
1040            .lock_blocking()
1041            .insert(self.entry.name.clone(), self.entry.clone());
1042    }
1043}
1044
1045struct Writer<'a> {
1046    inner: Box<dyn Write + 'a>,
1047    mem: MemWriter,
1048}
1049
1050impl std::fmt::Debug for Writer<'_> {
1051    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1052        f.debug_struct("Writer").field("mem", &self.mem).finish()
1053    }
1054}
1055
1056impl<'a> Write for Writer<'a> {
1057    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1058        self.mem.write(buf)
1059    }
1060
1061    fn flush(&mut self) -> std::io::Result<()> {
1062        self.mem.flush()
1063    }
1064}
1065
1066impl<'a> Seek for Writer<'a> {
1067    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
1068        self.mem.seek(pos)
1069    }
1070
1071    fn stream_position(&mut self) -> std::io::Result<u64> {
1072        self.mem.stream_position()
1073    }
1074
1075    fn rewind(&mut self) -> std::io::Result<()> {
1076        self.mem.rewind()
1077    }
1078}
1079
1080impl<'a> Drop for Writer<'a> {
1081    fn drop(&mut self) {
1082        let _ = self.inner.write_all(&self.mem.data);
1083        let _ = self.inner.flush();
1084    }
1085}